library(readxl)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(KernSmooth)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
library(MASS)
library(VineCopula)
## Warning: package 'VineCopula' was built under R version 4.2.3
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
library(fitdistrplus)
## Loading required package: survival
library(copula)
## Warning: package 'copula' was built under R version 4.2.3
## 
## Attaching package: 'copula'
## The following object is masked from 'package:VineCopula':
## 
##     pobs
library(kdecopula)
## Warning: package 'kdecopula' was built under R version 4.2.3
#install.packages("htmltools")
#update.packages()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(plot3D)
## Warning: package 'plot3D' was built under R version 4.2.3
library(moments)
library(gridExtra)
library(cubature)
## Warning: package 'cubature' was built under R version 4.2.3
# Télécharger les cours de l'action Apple
getSymbols("AAPL", from = "2012-01-01", to = "2022-12-31")
## [1] "AAPL"
# Télécharger les données sur les cours de l'action Microsoft
msft <- getSymbols("MSFT", from = "2012-01-01", to = "2022-12-31", auto.assign = FALSE)

# Calculer les rendements quotidiens pour les deux variables
aapl_ret <- diff(log(Cl(AAPL)))
msft_ret <- diff(log(Cl(msft)))
data <- data.frame(Date = index(AAPL), Cl(AAPL), AAPL = aapl_ret, Cl(msft), MSFT = msft_ret)
data <- na.omit(data)
write.csv(data, file = "datasetdraft.csv", row.names = FALSE)
# Extraire les 2 variables principales
X <- data$AAPL.Close.1
Y <- data$MSFT.Close.1

n <- length(X)
plot(X, Y, xlab = "AAPL", ylab = "MSFT", main = "La concentration moyenne ", 
     col = "blue", cex = 0.5)

#Distribution empirique de marge
Marge_empirique <- function (X) { 
  n <- length(X)
  Marge_empirique <- rank(X)/(n)
  return (Marge_empirique)
}
F_n <-  Marge_empirique(X)
G_n <-  Marge_empirique(Y)
#A_n <- ecdf(X)(X)
#B_n <- ecdf(Y)(Y)
#Détecter la dépendence
#a. Diagramme de dispersion
par(mfrow = c(1, 2))
plot(X, Y, xlab = "AAPL", ylab = "MSFT", col = "skyblue2", main = "Digramme de dispersion")
plot(F_n, G_n, xlab = "AAPL", ylab = "MSFT", col = "rosybrown2", main = "Rank-rank plot") 

#on voit que le rank-rank plot des 2 variables est comme celle de la coupule Student
#b. Khi-plot
# Khi-plot

BiCopChiPlot(F_n, G_n, mode = "NULL", ylim = c(-0.5, 0.5), main = "Khi-plot", col = "lightgreen", cex = 0.5)

# K-plot

BiCopKPlot(F_n, G_n, PLOT = TRUE, main = "K-plot", col = "slateblue")

#I. Tests d'indépendance

#Parce que les deux variables ne sont pas gaussiennes. On va faire les tests non paramétriques

#Test de Spearman
test.spearman <- cor.test(X, Y, method = "spearman", exact = FALSE)
test.spearman
## 
##  Spearman's rank correlation rho
## 
## data:  X and Y
## S = 1.584e+09, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5513852
#une corrélation positive forte et statistiquement significative entre ces deux variables

#Test de Kendall
test.kendall <- cor.test(X, Y, method = "kendall")
test.kendall
## 
##  Kendall's rank correlation tau
## 
## data:  X and Y
## z = 31.559, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.4002385
# une forte corrélation positive entre les deux variables analysées.
#Test de Van der Waerden

#La valuer de la statistique
Van <- 1/length(X) * sum(qnorm(F_n, 0, 1) * qnorm(G_n, 0, 1))
Var_nVan <- 1/(length(X) - 1) * (sum(qnorm(c(1: n) / (n + 1), 0, 1) ^ 2) ^ 2)

t_statisque <- Van / sqrt(Var_nVan) * n
p_value <- (1 - pnorm(t_statisque, 0, 1)) * 2
p_value#p_value = 0 très petite, on rejete l'hypothèse H0
## [1] 0
#Tous les 3 tests montrent que les 2 variables sont dépendences
empirique_Copula <- function(X,Y,u,v,step){
  if(length(X) != length(Y)) stop("X and Y must have equal length")
  n <- length(X)
  p <- 0
  rX <- rank(X) / (n)
  rY <- rank(Y) / (n)
  for (i in 1:n){
    p <- p + ((u - step/2 <= rX[i] &  rX[i] <= u + step/2) & (v - step/2 <= rY[i] & rY[i] <= v + step/ 2))
  }
  return(p / (n * step * step))
}

databrut <- cbind(X,Y)

#Convertir les valeurs des données en rangs, puis les ajuster pour qu'ils soient compris dans l'intervalle [0, 1]
data_rank <- apply(databrut, 2, rank) / (n + 1)
kde <- kde2d(data_rank[,1], data_rank[,2], n = 20)

U1 <- seq(0,1,length.out =20)
U2 <- seq(0,1,length.out =20)
grid <- expand.grid(U1 = U1, U2 = U2)


plot_ly(x = kde$x, y = kde$y, z = kde$z, type = "surface")
z_brut <- matrix(empirique_Copula(data_rank[,1], data_rank[,2], grid$U1, grid$U2, 1/20), nrow = 20, ncol = 20)

z_brut
##            [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
##  [1,] 4.6259487 1.8792917 0.7228045 0.5782436 0.2891218 0.0000000 0.1445609
##  [2,] 2.7466570 6.2161185 1.7347308 1.4456090 1.1564872 0.7228045 0.4336827
##  [3,] 0.5782436 2.8912179 3.3249006 1.8792917 1.4456090 1.8792917 0.8673654
##  [4,] 0.1445609 2.1684134 2.8912179 3.0357788 1.8792917 1.7347308 0.8673654
##  [5,] 0.0000000 1.4456090 2.0238525 1.4456090 2.0238525 1.8792917 0.8673654
##  [6,] 0.1445609 1.0119263 0.5782436 2.3129743 0.8673654 1.7347308 1.5901699
##  [7,] 0.0000000 0.2891218 0.5782436 1.3010481 1.7347308 1.7347308 1.5901699
##  [8,] 0.5782436 0.0000000 1.0119263 1.3010481 1.4456090 1.4456090 1.1564872
##  [9,] 0.0000000 0.1445609 0.8673654 0.8673654 1.0119263 1.1564872 2.4575352
## [10,] 0.1445609 0.2891218 1.0119263 0.5782436 1.1564872 1.0119263 1.3010481
## [11,] 0.1445609 0.2891218 1.0119263 0.5782436 1.7347308 1.4456090 1.0119263
## [12,] 0.1445609 0.1445609 0.7228045 0.0000000 0.5782436 0.5782436 1.3010481
## [13,] 0.0000000 0.5782436 0.2891218 0.5782436 0.8673654 0.7228045 0.8673654
## [14,] 0.1445609 0.1445609 0.5782436 0.5782436 0.2891218 0.7228045 1.4456090
## [15,] 0.0000000 0.1445609 0.5782436 0.8673654 0.5782436 0.4336827 0.7228045
## [16,] 0.0000000 0.0000000 0.1445609 0.7228045 0.5782436 0.2891218 0.1445609
## [17,] 0.0000000 0.1445609 0.2891218 0.4336827 0.2891218 0.5782436 0.5782436
## [18,] 0.0000000 0.5782436 0.1445609 0.1445609 0.7228045 0.1445609 0.1445609
## [19,] 0.0000000 0.1445609 0.2891218 0.2891218 0.1445609 0.2891218 0.8673654
## [20,] 0.1445609 0.4336827 0.2891218 0.1445609 0.0000000 0.2891218 0.2891218
##            [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]
##  [1,] 0.1445609 0.0000000 0.0000000 0.1445609 0.1445609 0.0000000 0.1445609
##  [2,] 0.7228045 0.7228045 0.2891218 0.4336827 0.2891218 0.4336827 0.2891218
##  [3,] 0.5782436 1.4456090 0.2891218 0.5782436 0.7228045 0.4336827 0.4336827
##  [4,] 0.8673654 1.3010481 1.0119263 0.5782436 0.2891218 0.7228045 0.4336827
##  [5,] 0.7228045 1.5901699 1.1564872 1.1564872 0.5782436 0.2891218 1.1564872
##  [6,] 1.4456090 1.1564872 1.1564872 0.8673654 0.8673654 0.7228045 0.8673654
##  [7,] 2.1684134 1.4456090 1.3010481 1.3010481 1.4456090 1.4456090 0.5782436
##  [8,] 2.3129743 1.0119263 1.3010481 1.1564872 0.7228045 1.0119263 0.8673654
##  [9,] 1.4456090 1.1564872 1.1564872 1.8792917 1.8792917 1.7347308 0.7228045
## [10,] 1.7347308 1.4456090 2.3129743 1.3010481 0.5782436 1.1564872 1.1564872
## [11,] 1.1564872 1.1564872 1.3010481 1.0119263 0.7228045 0.7228045 1.7347308
## [12,] 1.0119263 1.4456090 1.8792917 1.5901699 1.0119263 1.7347308 0.5782436
## [13,] 0.8673654 1.1564872 1.3010481 1.3010481 1.8792917 1.8792917 1.8792917
## [14,] 0.8673654 1.1564872 0.7228045 1.4456090 1.8792917 0.8673654 1.1564872
## [15,] 0.8673654 1.1564872 0.7228045 1.3010481 1.1564872 1.4456090 2.0238525
## [16,] 0.7228045 0.7228045 1.4456090 1.1564872 1.3010481 0.5782436 2.3129743
## [17,] 0.7228045 0.5782436 1.1564872 0.5782436 1.3010481 0.8673654 1.1564872
## [18,] 0.0000000 0.0000000 0.2891218 0.4336827 1.0119263 2.1684134 0.8673654
## [19,] 0.4336827 0.2891218 0.5782436 0.8673654 0.8673654 0.5782436 0.8673654
## [20,] 0.0000000 0.1445609 0.1445609 0.0000000 0.4336827 0.5782436 0.1445609
##           [,15]     [,16]     [,17]     [,18]     [,19]     [,20]
##  [1,] 0.1445609 0.2891218 0.0000000 0.0000000 0.0000000 0.0000000
##  [2,] 0.4336827 0.1445609 0.5782436 0.1445609 0.1445609 0.1445609
##  [3,] 0.1445609 0.4336827 0.4336827 0.2891218 0.1445609 0.1445609
##  [4,] 0.2891218 0.2891218 0.2891218 0.2891218 0.1445609 0.0000000
##  [5,] 0.7228045 0.4336827 0.1445609 0.2891218 0.2891218 0.4336827
##  [6,] 1.0119263 0.4336827 1.3010481 0.7228045 0.0000000 0.0000000
##  [7,] 0.2891218 0.5782436 0.5782436 0.1445609 0.2891218 0.1445609
##  [8,] 1.1564872 0.8673654 0.7228045 0.1445609 0.2891218 0.7228045
##  [9,] 0.4336827 0.5782436 0.7228045 0.4336827 0.4336827 0.2891218
## [10,] 1.1564872 1.0119263 0.4336827 0.5782436 0.5782436 0.0000000
## [11,] 1.1564872 1.0119263 0.2891218 1.1564872 0.7228045 0.4336827
## [12,] 1.4456090 1.3010481 1.1564872 0.7228045 1.1564872 0.0000000
## [13,] 1.7347308 1.0119263 0.4336827 1.1564872 0.7228045 0.1445609
## [14,] 1.5901699 1.5901699 0.8673654 1.3010481 1.3010481 0.1445609
## [15,] 1.4456090 1.1564872 1.7347308 1.4456090 1.3010481 0.2891218
## [16,] 1.8792917 2.0238525 2.4575352 1.7347308 0.5782436 0.0000000
## [17,] 1.1564872 1.4456090 2.4575352 2.1684134 2.4575352 0.2891218
## [18,] 0.8673654 2.1684134 3.3249006 2.8912179 2.8912179 0.4336827
## [19,] 1.3010481 1.3010481 1.1564872 2.4575352 3.7585833 2.7466570
## [20,] 0.7228045 0.5782436 0.1445609 0.4336827 1.5901699 3.4694615
plot_ly(x = U1, y = U2, z = z_brut, type  = "surface")
#II.Estimation de copule
#1.Methode parametrique
#Histogramme

plotdist(X, breaks = 20, histo = TRUE, demp = TRUE)

descdist(X, discrete=FALSE, boot=500)

## summary statistics
## ------
## min:  -0.137708   max:  0.1131575 
## median:  0.0006757409 
## mean:  0.0007878741 
## estimated sd:  0.01836405 
## estimated skewness:  -0.276231 
## estimated kurtosis:  8.771136
plotdist(Y, histo = TRUE, demp = TRUE)

descdist(Y, discrete=FALSE, boot=500)

## summary statistics
## ------
## min:  -0.1594534   max:  0.132929 
## median:  0.0006322637 
## mean:  0.000792413 
## estimated sd:  0.01673784 
## estimated skewness:  -0.232189 
## estimated kurtosis:  12.04573
#=>X and Y ne semblent suivre aucune distribution populaire
#on ne peut pas utiliser les valeurs de X parce que:
fit1 <- fitdist(Y, "norm", lower = c(-Inf, 0), start = list(mean = mean(X), sd = sd(X)))

# Test goodness of fit
ks.test(Y, "pnorm", mean = fit1$estimate["mean"], sd = fit1$estimate["sd"])
## Warning in ks.test.default(Y, "pnorm", mean = fit1$estimate["mean"], sd =
## fit1$estimate["sd"]): ties should not be present for the Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  Y
## D = 0.078589, p-value = 2.887e-15
## alternative hypothesis: two-sided
#ne suivre pas la distribution Normal 
#2.Semi-Parametrique
#Choisir la copule appropriée pour l’ensemble de données 
selectedCopula <- BiCopSelect(data_rank[,1], data_rank[,2], familyset = NA)
selectedCopula
## Bivariate copula: t (par = 0.59, par2 = 2.92, tau = 0.4)
#Student with parameter 0.59
model_Copula <- tCopula(param = 0.59, dim = 2, dispstr="un", df=6) 

fitstudent <- fitCopula(model_Copula, data_rank, method = "ml")

fitstudent
## Call: fitCopula(model_Copula, data = data_rank, ... = pairlist(method = "ml"))
## Fit based on "maximum likelihood" and 2767 2-dimensional observations.
## Copula: tCopula 
##  rho.1     df 
## 0.5918 2.9221 
## The maximized loglikelihood is 700.3 
## Optimization converged
rho = coef(fitstudent)[1] #the estimated correlation parameter
#rho = 0.5917759 
df = coef(fitstudent)[2] #the degrees of freedom
#df = 2.922124

#plot_3d <- persp(tCopula(dim=2,rho,df=df),dCopula, col ="salmon")
#3D visualisation sous la copule Student
# Convertir les valeurs des données en rangs, puis les ajuster pour qu'ils soient compris dans l'intervalle [0, 1]


U1 <- seq(0,1,length.out =20)
U2 <- seq(0,1,length.out =20)
grid <- expand.grid(U1 = U1, U2 = U2)

z_student <- c()
for (i in (1:nrow(grid))){
  z_student <- append(z_student, dCopula(c(grid[i,1], grid[i,2]), model_Copula))
}

z_student <- matrix(z_student, ncol = length(U1), nrow = length(U1))


plot_ly(x = U1, y = U2, z = z_student, type = "surface")
#Test goodness-of-fit
gof.t <- gofCopula(copula = ellipCopula(family = c("t"), df =6, df.fixed = T),x = data_rank)
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
gof.t
## 
##  Parametric bootstrap-based goodness-of-fit test of t-copula, dim. d =
##  2, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.077476, parameter = 0.61355, p-value = 0.0004995

Parce que l’on peut ces deux tail dependencie ou au moins lower tail dependency, nous allons essayer la copule Clayton pour modéliser la dépendance entre ces 2 variables.

#Comme les graphs montrent qu'il y a des tail dependency entre les deux variables, on va choisir les familes des copules
# qui peuvent representer ces deux tail dependencie ou au moins lower tail dependency


#1.1 Clayton Copula:

Clayton.fit <- fitCopula(claytonCopula(), data = data_rank, method = 'mpl')
Clayton.fit
## Call: fitCopula(claytonCopula(), data = data_rank, ... = pairlist(method = "mpl"))
## Fit based on "maximum pseudo-likelihood" and 2767 2-dimensional observations.
## Copula: claytonCopula 
## alpha 
## 1.335 
## The maximized loglikelihood is 540.8 
## Optimization converged
Clayton.obs <- claytonCopula(param = 1.335)
Clayton.obs
## Clayton copula, dim. d = 2 
## Dimension:  2 
## Parameters:
##   alpha   = 1.335
#result
#alpha 
#1.335 
#The maximized loglikelihood is 540.8 
#Optimization converged

#3D show


z_clayton <- c()
for (i in (1:nrow(grid))){
  z_clayton <- append(z_clayton, dCopula(c(grid[i,1], grid[i,2]), Clayton.obs))
}

z_clayton <- matrix(z_clayton, ncol = length(U1), nrow = length(U1))

plot_ly(x = U1, y = U2, z = z_clayton, type = "surface")
#Test goodness-of-fit

gof.Clayton <- gofCopula( Clayton.obs, data_rank)
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52

## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
gof.Clayton
## 
##  Parametric bootstrap-based goodness-of-fit test of Clayton copula, dim.
##  d = 2, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.41974, parameter = 1.3347, p-value = 0.0004995
#3.Non parametric:

#3.1 Kernel Estimation

#3.1.1 Gaussian Kernel without reflection method

# Define Gaussian kernel function
gaussian_kernel <- function(u, v, h) {
  (1 / (2 * pi * h^2)) * exp(-(u^2 + v^2) / (2 * h^2))
}

kernel_density_estimation <- function(u, v, data, h) {
  n <- nrow(data)
  density <- 0
  for (i in 1:n) {
    density <- density + gaussian_kernel((u - data[i, 1]), (v - data[i, 2]), h)
  }
  density <- density / (n)
  return(density)
}


loglik_func_kernel <- function(data,h){
  n <- nrow(data)
  loglik = sum(log(kernel_density_estimation(data[,1], data[,2],data,h)))
  return(loglik)
}

test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
  log_lik_ker <- append(log_lik_ker, loglik_func_kernel(data_rank, i))
}
plot(test_h, log_lik_ker)

par(mfrow = c(2, 2))
plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.01), nrow  = 20, ncol = 20), type  = "surface")
 plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.05), nrow  = 20, ncol = 20), type  = "surface")
 plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.07), nrow  = 20, ncol = 20), type  = "surface")
 plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.1), nrow  = 20, ncol = 20), type  = "surface")
#3.1.2.Gaussian Kernel reflection method 
reflection<- function(data){
  augmented_data <- data.frame()
  for (i in c( 1, 0, -1 )){
    if (i == 1){
      col1 <- 2- data[,1]
    }
    else if (i == 0){
      col1 <- data[,1]
    }
    else {
      col1 <- -data[,1]
    }
    for (j in c(1, 0, -1)){
      if (j == 1){
      col2 <- 2- data[,2]
    }
      else if (j == 0){
        col2 <- data[,2]
      }
      else {
        col2 <- -data[,2]
      }
      newdat <- cbind(col1, col2)
      augmented_data <- rbind(augmented_data, newdat)
  }
  }
  return(augmented_data)
}

augmented_data <- reflection(data_rank)
par(mfrow = c(1, 2))
plot(augmented_data, main = "Data reflected", xlab = 'U', ylab = 'V')
plot(data_rank, main = "Data" , xlab = 'U', ylab = 'V')

kernel_density_reflection_estimation <- function(u, v,data, h) {
  n <- nrow(augmented_data)
  density <- 0
  for (i in 1:n) {
    density <- density + gaussian_kernel((u - augmented_data[i, 1]), (v - augmented_data[i, 2]), h)
  }
  density <- density * 9 / (n)
  return(density)
}

test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
  log_lik_ker <- append(log_lik_ker, sum(log(kernel_density_reflection_estimation(data_rank[,1], data_rank[,2],augmented_data,i))))
}

plot(test_h, log_lik_ker)

plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.01), nrow  = 20, ncol  =20)
, type  = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.05), nrow  = 20, ncol  =20)
, type  = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.07), nrow  = 20, ncol  =20)
, type  = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.1), nrow  = 20, ncol  =20)
, type  = "surface")
loglik_reflected <- sum(log(kernel_density_reflection_estimation(data_rank[,1], data_rank[,2],augmented_data,0.1)))
loglik_reflected
## [1] 565.5902

#3.2 Beta Kernel Estimation

# Bivariate beta kernel function
bivariate_beta_kernel <- function(x,alpha, beta) {
  return((x^(alpha - 1) * (1 - x)^(beta - 1) / beta(alpha, beta)))}


# Bivariate beta kernel density estimation function
bivariate_beta_kernel_density <- function(data, u,v, bandwidth) {
  
  n <- nrow(data)
  alpha_u <- u/bandwidth + 1
  beta_u <- (1-u)/bandwidth + 1
  alpha_v <- v/bandwidth + 1
  beta_v <- (1-v)/bandwidth + 1
  kde <- 0
  for (i in 1:n){
    kde <- kde + bivariate_beta_kernel(data[i,1],alpha_u, beta_u) * bivariate_beta_kernel(data[i,2],alpha_v, beta_v)
  }
  
  return(kde/n)
}

test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
  log_lik_ker <- append(log_lik_ker, sum(log(bivariate_beta_kernel_density(data_rank, data_rank[,1], data_rank[,2], i))))
}

plot(test_h, log_lik_ker)

plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.01), nrow = length(U1), ncol = length(U1)), type  = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.03), nrow = length(U1), ncol = length(U1)), type  = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.05), nrow = length(U1), ncol = length(U1)), type  = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.1), nrow = length(U1), ncol = length(U1)), type  = "surface")
#plot_ly(x = U1, y = U2 ,z = z_beta, type  = "contour")
#3. Model Comparison
h_gaus <- 0.07
h_reflec <- 0.05
h_beta <- 0.02
squared_error <- function(x, hypothetical_copula) {
  u <- x[1]
  v <- x[2]
  empirical_density <- empirique_Copula(F_n, G_n, u, v, 0.05)
  if (hypothetical_copula == 'gker'){
    hypothetical_density <- kernel_density_estimation(u, v,data_rank, h_gaus)
  }
  
  else if (hypothetical_copula == 'gkerf'){
    hypothetical_density <- kernel_density_reflection_estimation(u, v,data_rank, h_reflec)
  }
  
  else if (hypothetical_copula == 'beta'){
    hypothetical_density <- bivariate_beta_kernel_density(data_rank,u, v, h_beta)
  }
    
  else if (hypothetical_copula == 'student'){
    hypothetical_density <- dCopula(c(u,v), copula = fitstudent@copula)
  }
  else {
    hypothetical_density <- dCopula(c(u,v), copula = Clayton.obs)
  }
  
  return((empirical_density - hypothetical_density)^2)
}


mise_result <- adaptIntegrate(squared_error,lower = c(0, 0),
                              upper = c(1, 1),
                              hypothetical_copula = 'gker', maxEval = 100)

MISE_compare <- data.frame()
for (i in c('gker', 'gkerf','beta', 'student', 'clayton')){
  mise <- adaptIntegrate(squared_error,lower = c(0, 0),upper = c(1, 1),hypothetical_copula = i , maxEval=100)
  MISE_compare <- rbind(MISE_compare, c(i, mise$integral))
}
  
plot_ly(x = MISE_compare[,1], y = MISE_compare[,2], type = 'bar')